import scvelo as scv
import scanpy as sc
import cellrank as cr
import loompy as lp
import numpy as np
import pandas as pd
import re
import os
scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo')
cr.settings.verbosity = 2
import warnings
warnings.simplefilter('ignore', category=UserWarning)
warnings.simplefilter('ignore', category=FutureWarning)
warnings.simplefilter('ignore', category=DeprecationWarning)
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
# Import velocyto loom as anndata
adata = scv.read_loom('data/object/velocyto.loom')
if not adata.var_names.is_unique: adata.var_names_make_unique()
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
# QC meta data
meta = pd.read_csv('data/object/seurat/meta/meta.csv', index_col=0)
# Filter adata by meta CellID and merge
adata = adata[meta.index]
adata.obs = adata.obs.merge(meta, how='left', left_index=True, right_index=True, suffixes=('', ''))
adata.obs['sample_name'] = adata.obs['sample_name'].astype('category')
scv.pl.proportions(adata, groupby='sample_name')
adata_1 = adata[(adata.obs['treatment'] == 'NaCl')].copy()
adata_2 = adata[(adata.obs['treatment'] == 'CpG')].copy()
scv.pp.filter_and_normalize(adata_1, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_1)
sc.pp.neighbors(adata_1, n_pcs=30, n_neighbors=30)
scv.pp.filter_and_normalize(adata_2, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_2)
sc.pp.neighbors(adata_2, n_pcs=30, n_neighbors=30)
Filtered out 26244 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X. Filtered out 26093 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X.
scv.pp.moments(adata_1, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata_2, n_pcs=30, n_neighbors=30)
computing moments based on connectivities
finished (0:00:02) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
computing moments based on connectivities
finished (0:00:03) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
scv.tl.recover_dynamics(adata_1, n_jobs=8)
scv.tl.recover_dynamics(adata_2, n_jobs=8)
recovering dynamics (using 8/32 cores)
finished (0:34:48) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
recovering dynamics (using 8/32 cores)
finished (0:56:09) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata_1, mode='dynamical')
scv.tl.velocity(adata_2, mode='dynamical')
computing velocities
finished (0:00:21) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocities
finished (0:00:35) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
scv.tl.velocity_graph(adata_1)
scv.tl.velocity_graph(adata_2)
computing velocity graph (using 1/32 cores)
finished (0:00:47) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
computing velocity graph (using 1/32 cores)
finished (0:01:51) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
sc.tl.umap(adata_1)
sc.tl.umap(adata_2)
adata_1.write('data/object/seurat_sct_nacl/scvelo.h5ad')
adata_2.write('data/object/seurat_sct_cpg/scvelo.h5ad')
# adata_1 = sc.read_h5ad('data/object/scvelo_1.h5ad')
# adata_2 = sc.read_h5ad('data/object/scvelo_2.h5ad')
... storing 'treatment' as categorical ... storing 'cellranger_class' as categorical ... storing 'qc_class' as categorical ... storing 'ery_qc' as categorical ... storing 'treatment' as categorical ... storing 'cellranger_class' as categorical ... storing 'qc_class' as categorical ... storing 'ery_qc' as categorical
# umap_1 = pd.read_csv('data/object/components/reductions/scanvi_prog_nacl_umap_nno.csv', index_col=0)
# umap_1 = umap_1[np.isin(umap_1.index, adata_1.obs.index)]
# umap_1 = umap_1.reindex(adata_1.obs.index).to_numpy()
# adata_1.obsm['X_umap'] = umap_1
# umap_2 = pd.read_csv('data/object/components/reductions/scanvi_prog_cpg_umap_nno.csv', index_col=0)
# umap_2 = umap_2[np.isin(umap_2.index, adata_2.obs.index)]
# umap_2 = umap_2.reindex(adata_2.obs.index).to_numpy()
# adata_2.obsm['X_umap'] = umap_2
# cluster_1 = pd.read_csv('data/object/components/cluster/scanvi_prog_nacl_snn_res.0.8.csv', index_col=0)
# cluster_1 = cluster_1[np.isin(cluster_1.index, adata_1.obs.index)]
# cluster_1 = cluster_1.reindex(adata_1.obs.index)
# adata_1.obs['cluster'] = cluster_1
# cluster_2 = pd.read_csv('data/object/components/cluster/scanvi_prog_cpg_snn_res.0.8.csv', index_col=0)
# cluster_2 = cluster_2[np.isin(cluster_2.index, adata_2.obs.index)]
# cluster_2 = cluster_2.reindex(adata_2.obs.index)
# adata_2.obs['cluster'] = cluster_2
scv.pl.velocity_embedding_stream(adata_1, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")
scv.pl.velocity_embedding_stream(adata_2, basis="umap", legend_fontsize=12, title="", smooth=0.8, min_mass=4, color = "SCT_snn_res.0.8")
computing velocity embedding
finished (0:00:06) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
computing velocity embedding
finished (0:00:14) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
cr.tl.terminal_states(adata_1, cluster_key="SCT_snn_res.0.8", weight_connectivities=0.2)
cr.tl.terminal_states(adata_2, cluster_key="SCT_snn_res.0.8", weight_connectivities=0.2)
Computing transition matrix based on logits using `'deterministic'` mode Estimating `softmax_scale` using `'deterministic'` mode
Setting `softmax_scale=6.2546`
Finish (0:00:37)
Using a connectivity kernel with weight `0.2`
Computing transition matrix based on `adata.obsp['connectivities']`
Finish (0:00:00)
Computing eigendecomposition of the transition matrix
Adding `adata.uns['eig_fwd']`
`.eigendecomposition`
Finish (0:00:01)
Computing Schur decomposition
Adding `adata.uns['eig_fwd']`
`.eigendecomposition`
`.schur`
`.schur_matrix`
Finish (0:00:09)
Computing `4` macrostates
Adding `.macrostates_memberships`
`.macrostates`
`.schur`
`.coarse_T`
`.coarse_stationary_distribution`
Finish (0:00:03)
Adding `adata.obs['terminal_states_probs']`
`adata.obs['terminal_states']`
`adata.obsm['macrostates_fwd']`
`.terminal_states_probabilities`
`.terminal_states`
Finish
Computing transition matrix based on logits using `'deterministic'` mode
Estimating `softmax_scale` using `'deterministic'` mode
Setting `softmax_scale=4.2185`
Finish (0:00:59) Using a connectivity kernel with weight `0.2` Computing transition matrix based on `adata.obsp['connectivities']`
Finish (0:00:00)
Computing eigendecomposition of the transition matrix
Adding `adata.uns['eig_fwd']`
`.eigendecomposition`
Finish (0:00:01)
Computing Schur decomposition
Adding `adata.uns['eig_fwd']`
`.eigendecomposition`
`.schur`
`.schur_matrix`
Finish (0:00:00)
Computing `6` macrostates
Adding `.macrostates_memberships`
`.macrostates`
`.schur`
`.coarse_T`
`.coarse_stationary_distribution`
Finish (0:00:06)
Adding `adata.obs['terminal_states_probs']`
`adata.obs['terminal_states']`
`adata.obsm['macrostates_fwd']`
`.terminal_states_probabilities`
`.terminal_states`
Finish
cr.pl.terminal_states(adata_1)
cr.pl.terminal_states(adata_2)
cr.tl.initial_states(adata_1, cluster_key="SCT_snn_res.0.8")
cr.tl.initial_states(adata_2, cluster_key="SCT_snn_res.0.8")
Computing transition matrix based on logits using `'deterministic'` mode Estimating `softmax_scale` using `'deterministic'` mode
Setting `softmax_scale=6.2546`
Finish (0:00:50)
Using a connectivity kernel with weight `0.2`
Computing transition matrix based on `adata.obsp['connectivities']`
Finish (0:00:00)
Computing eigendecomposition of the transition matrix
Adding `adata.uns['eig_bwd']`
`.eigendecomposition`
Finish (0:00:01)
Computing Schur decomposition
When computing macrostates, choose a number of states NOT in `[4]`
Adding `adata.uns['eig_bwd']`
`.eigendecomposition`
`.schur`
`.schur_matrix`
Finish (0:00:00)
Computing `6` macrostates
Adding `.macrostates_memberships`
`.macrostates`
`.schur`
`.coarse_T`
`.coarse_stationary_distribution`
Finish (0:00:06)
Adding `adata.obs['initial_states_probs']`
`adata.obs['initial_states']`
`adata.obsm['macrostates_bwd']`
`.terminal_states_probabilities`
`.terminal_states`
Finish
Computing transition matrix based on logits using `'deterministic'` mode
Estimating `softmax_scale` using `'deterministic'` mode
Setting `softmax_scale=4.2185`
Finish (0:01:23) Using a connectivity kernel with weight `0.2` Computing transition matrix based on `adata.obsp['connectivities']`
Finish (0:00:00)
Computing eigendecomposition of the transition matrix
Adding `adata.uns['eig_bwd']`
`.eigendecomposition`
Finish (0:00:01)
Computing Schur decomposition
Adding `adata.uns['eig_bwd']`
`.eigendecomposition`
`.schur`
`.schur_matrix`
Finish (0:00:00)
Computing `2` macrostates
Adding `.macrostates_memberships`
`.macrostates`
`.schur`
`.coarse_T`
`.coarse_stationary_distribution`
Finish (0:00:01)
Adding `adata.obs['initial_states_probs']`
`adata.obs['initial_states']`
`adata.obsm['macrostates_bwd']`
`.terminal_states_probabilities`
`.terminal_states`
Finish
cr.pl.initial_states(adata_1, discrete=True)
cr.pl.initial_states(adata_2, discrete=True)
cr.tl.lineages(adata_1)
cr.tl.lineages(adata_2)
Computing lineage probabilities towards terminal states Computing absorption probabilities
Adding `adata.obsm['to_terminal_states']`
`.absorption_probabilities`
Finish (0:00:18)
Adding lineages to `adata.obsm['to_terminal_states']`
Finish (0:00:18)
Computing lineage probabilities towards terminal states
Computing absorption probabilities
Adding `adata.obsm['to_terminal_states']`
`.absorption_probabilities`
Finish (0:00:21)
Adding lineages to `adata.obsm['to_terminal_states']`
Finish (0:00:21)
cr.pl.lineages(adata_1, same_plot=False)
cr.pl.lineages(adata_2, same_plot=False)
from cellrank.tl.kernels import VelocityKernel
vk = VelocityKernel(adata_1).compute_transition_matrix()
Computing transition matrix based on logits using `'deterministic'` mode Estimating `softmax_scale` using `'deterministic'` mode
Setting `softmax_scale=6.2546`
Finish (0:00:37)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata_1).compute_transition_matrix()
Computing transition matrix based on `adata.obsp['connectivities']`
Finish (0:00:00)
combined_kernel = 0.8 * vk + 0.2 * ck
from cellrank.tl.estimators import GPCCA
g = GPCCA(combined_kernel)
WARNING: Computing transition matrix using the default parameters
g.compute_schur(n_components=20)
g.plot_spectrum()
Computing Schur decomposition
When computing macrostates, choose a number of states NOT in `[14]`
Adding `adata.uns['eig_fwd']`
`.eigendecomposition`
`.schur`
`.schur_matrix`
Finish (0:00:01)
g.plot_schur(use=3)
g.compute_macrostates(n_states=3, cluster_key="SCT_snn_res.0.8")
g.plot_macrostates()
Computing `3` macrostates
Adding `.macrostates_memberships`
`.macrostates`
`.schur`
`.coarse_T`
`.coarse_stationary_distribution`
Finish (0:00:01)
g.plot_macrostates(same_plot=False)